Consider the cohort as a single group to start. Do not stratify by case-control status.
Stack the heatmaps for CD4. Are there obvious patterns to the antigens?
Stack the heatmaps for NOTCD4. Are there patterns? Are these patterns different from CD4?
Calculate FS/PFS for each stimulation in CD4 and compare side-by-side in boxplots and test by ANOVA. Is there a clear winner? Calculate FS/PFS for each stimulation in NOTCD4 and compare with boxplot and ANOVA. Is there a winner? Is this different than CD4?
It may also make sense to make a corrplot of the FS/PFS from the different stims. This is asking a different question though, i.e. whether a patient’s cytokine responses tend to be consistently elevated/not-elevated across multiple stims.
Regress FS/PFS against age (linear). Is there an effect of age on T cell functionality? Regress FS/PFS against days from symptom onset (linear). Is there a decrease in functionality with time? Stratify FS/PFS by sex and test by t-test (or Mann-Whitney, though sample size is large enough). Is there a difference?
Only then would you start asking the case-control questions.
library(here)
library(tidyverse)
library(COMPASS)
library(ggplot2)
library(data.table)
library(broom)
library(psych)
library(corrplot)
library(ggpubr)
library(knitr)
library(kableExtra)
library(cowplot)
library(purrr)
library(flowWorkspace)
source(here::here("scripts/my_pheatmap_and_plotMeanGamma.R"))
source(here::here("scripts/20200604_Helper_Functions.R"))
save_output <- FALSE
# Next 4 lines from Run_COMPASS.R
stims_for_compass_runs <- c("VEMP", "Spike 1", "Spike 2", "NCAP")
parent_nodes_for_compass_runs <- c("4+", "NOT4+", "8+")
stims_for_compass_runs_rep <- rep(stims_for_compass_runs, each = length(parent_nodes_for_compass_runs))
parent_nodes_for_compass_runs_rep <- rep(parent_nodes_for_compass_runs, times = length(stims_for_compass_runs))
crList <- purrr::pmap(.l = list(parent_nodes_for_compass_runs_rep,
stims_for_compass_runs_rep),
.f = function(parent, currentStim) {
currentStimForFile <- gsub(" ", "_", currentStim)
crPath <- here::here(sprintf("out/CompassOutput/%s/%s/COMPASSResult_%s_%s.rds", parent, currentStimForFile, parent, currentStimForFile))
readRDS(crPath)
}) %>%
set_names(gsub("+", "", gsub(" ", "_", paste0(parent_nodes_for_compass_runs_rep, "_", stims_for_compass_runs_rep)), fixed=TRUE))
names(crList)
## [1] "4_VEMP" "NOT4_VEMP" "8_VEMP" "4_Spike_1" "NOT4_Spike_1"
## [6] "8_Spike_1" "4_Spike_2" "NOT4_Spike_2" "8_Spike_2" "4_NCAP"
## [11] "NOT4_NCAP" "8_NCAP"
CD4RunNames <- c("4_Spike_1", "4_Spike_2", "4_NCAP", "4_VEMP")
NotCD4RunNames <- c("NOT4_Spike_1", "NOT4_Spike_2", "NOT4_NCAP", "NOT4_VEMP")
CD8RunNames <- c("8_Spike_1", "8_Spike_2", "8_NCAP", "8_VEMP")
plotCOMPASSResultStack(crList[CD4RunNames],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "CD4 COMPASS Runs",
fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 5 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, !CD107a&!CD154&IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa
plotCOMPASSResultStack(crList[NotCD4RunNames],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "Not-CD4 COMPASS Runs",
fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, !CD107a&CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa
plotCOMPASSResultStack(crList[CD8RunNames],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "CD8 COMPASS Runs",
fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa
plotCOMPASSResultStack(crList[c(CD4RunNames, NotCD4RunNames)],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "CD4 and Not-CD4 COMPASS Runs",
fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 5 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa
plotCOMPASSResultStack(crList[c(CD4RunNames, CD8RunNames)],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "CD4 and CD8 COMPASS Runs",
fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa
plotCOMPASSResultStack(crList[c(CD4RunNames, NotCD4RunNames, CD8RunNames)],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "CD4, Not-CD4, and CD8 COMPASS Runs",
fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 7 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&IFNg&!IL17a&IL2&IL4/5/13&!TNFa, !CD107a&CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa, !CD107a&CD154&!IFNg&!IL17a&IL2&IL4/5/13&TNFa
if(save_output) {
png(filename=here::here("out/PostCompassPlots/20200805_CD4_COMPASS_Heatmap_All_Stims.png"),
width=700, height=1000)
plotCOMPASSResultStack(crList[CD4RunNames],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "CD4 COMPASS Runs",
fontsize = 14, fontsize_row = 13, fontsize_col = 13)
dev.off()
png(filename=here::here("out/PostCompassPlots/20200805_NotCD4_COMPASS_Heatmap_All_Stims.png"),
width=700, height=1000)
plotCOMPASSResultStack(crList[NotCD4RunNames],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "Not-CD4 COMPASS Runs",
fontsize = 14, fontsize_row = 13, fontsize_col = 13)
dev.off()
png(filename=here::here("out/PostCompassPlots/20200805_CD8_COMPASS_Heatmap_All_Stims.png"),
width=700, height=1000)
plotCOMPASSResultStack(crList[CD8RunNames],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "CD8 COMPASS Runs",
fontsize = 14, fontsize_row = 13, fontsize_col = 13)
dev.off()
png(filename=here::here("out/PostCompassPlots/20200805_CD4_and_NotCD4_COMPASS_Heatmap_All_Stims.png"),
width=900, height=2000)
plotCOMPASSResultStack(crList[c(CD4RunNames, NotCD4RunNames)],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "CD4 and Not-CD4 COMPASS Runs",
fontsize = 14, fontsize_row = 13, fontsize_col = 13)
dev.off()
}
fs_pfs_df <- lapply(c(CD4RunNames, NotCD4RunNames, CD8RunNames), function(n) {
as.data.frame(COMPASS::FunctionalityScore(crList[[n]])) %>%
rownames_to_column("SAMPLE ID") %>%
rename(FS = `COMPASS::FunctionalityScore(crList[[n]])`) %>%
left_join(as.data.frame(COMPASS::PolyfunctionalityScore(crList[[n]])) %>%
rownames_to_column("SAMPLE ID") %>%
rename(PFS = `COMPASS::PolyfunctionalityScore(crList[[n]])`),
by = "SAMPLE ID") %>%
mutate(COMPASS_run = n)
} ) %>%
data.table::rbindlist() %>%
separate(COMPASS_run, into = c("parent", "STIM"), remove = F, extra = "merge") %>%
mutate(STIM = factor(STIM, levels = c("Spike_1", "Spike_2", "NCAP", "VEMP")))#,
#`SAMPLE ID` = factor(`SAMPLE ID`, levels = unique(`SAMPLE ID`)))
table(fs_pfs_df$STIM, fs_pfs_df$`SAMPLE ID`) # remove 551432/07B for quade.test in order to have complete block design
##
## 102C 103C 106C 109C 112C 113C 117C 121C 122C 127C 128C 12C 13 130C
## Spike_1 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## Spike_2 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## NCAP 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## VEMP 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##
## 131C 133C 136C 139C 141C 142C 143C 147C 148C 150C 15514 15518 15529
## Spike_1 3 3 3 3 3 3 3 3 3 3 3 3 3
## Spike_2 3 3 3 3 3 3 3 3 3 3 3 3 3
## NCAP 3 3 3 3 3 3 3 3 3 3 3 3 3
## VEMP 3 3 3 3 3 3 3 3 3 3 3 3 3
##
## 15530 15545 15548 15554 15575 157C 159C 161C 162C 164C 169C 170C 1C
## Spike_1 3 3 3 3 3 3 3 3 3 3 3 3 3
## Spike_2 2 3 2 3 3 3 3 3 3 3 3 3 3
## NCAP 3 3 2 3 3 3 3 3 3 3 3 3 3
## VEMP 3 3 3 3 3 3 3 3 3 3 3 3 3
##
## 21C 23 25 25C 29C 36C 38C 51C 551432 56C 59C 69C 6C 76C 80C 90C 93C
## Spike_1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## Spike_2 3 3 3 3 3 3 3 3 0 3 3 3 3 3 3 3 3
## NCAP 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## VEMP 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##
## 96C BWT20 CLT1 HS09 HS10 HS8
## Spike_1 3 3 3 3 3 3
## Spike_2 3 2 3 3 3 3
## NCAP 3 2 3 3 3 3
## VEMP 3 3 3 3 3 3
fs_pfs_df.complete <- fs_pfs_df %>%
dplyr::filter(`SAMPLE ID` != "551432") %>%
arrange(STIM, `SAMPLE ID`)
fs_pfs_df.complete.4 <- fs_pfs_df.complete %>% dplyr::filter(parent == "4") %>%
mutate(`SAMPLE ID` = factor(`SAMPLE ID`, levels = unique(`SAMPLE ID`)))
fs_pfs_df.complete.not4 <- fs_pfs_df.complete %>% dplyr::filter(parent == "NOT4") %>%
mutate(`SAMPLE ID` = factor(`SAMPLE ID`, levels = unique(`SAMPLE ID`)))
# And for CD8 need to remove more individuals for complete block design
fs_pfs_df.complete.8 <- fs_pfs_df.complete %>% dplyr::filter(parent == "8") %>%
dplyr::filter(!(`SAMPLE ID` %in% c("BWT20", "15548", "15530"))) %>%
mutate(`SAMPLE ID` = factor(`SAMPLE ID`, levels = unique(`SAMPLE ID`)))
# Perform statistical tests for Functionality Score across Stims
# Quade test is to wilcoxon signed rank test like ANOVA is to t-test
quade_fs_4 <- quade.test(FS ~ STIM | `SAMPLE ID`, data = fs_pfs_df.complete.4)
quade_fs_4
##
## Quade test
##
## data: FS and STIM and SAMPLE ID
## Quade F = 78.717, num df = 3, denom df = 183, p-value < 2.2e-16
quade_fs_not4 <- quade.test(FS ~ STIM | `SAMPLE ID`, data = fs_pfs_df.complete.not4)
quade_fs_not4
##
## Quade test
##
## data: FS and STIM and SAMPLE ID
## Quade F = 45.948, num df = 3, denom df = 183, p-value < 2.2e-16
quade_fs_8 <- quade.test(FS ~ STIM | `SAMPLE ID`, data = fs_pfs_df.complete.8)
quade_fs_8
##
## Quade test
##
## data: FS and STIM and SAMPLE ID
## Quade F = 15.461, num df = 3, denom df = 174, p-value = 5.834e-09
pw_fs_4 <- pairwise.wilcox.test(fs_pfs_df.complete.4$FS,
fs_pfs_df.complete.4$STIM,
p.adjust.method="bonferroni",
paired=TRUE)
pw_fs_4$p.value
## Spike_1 Spike_2 NCAP
## Spike_2 1.151773e-01 NA NA
## NCAP 2.593592e-03 3.446827e-07 NA
## VEMP 6.244948e-11 4.659364e-11 5.395379e-11
broom::tidy(pw_fs_4)
## # A tibble: 6 x 3
## group1 group2 p.value
## <chr> <chr> <dbl>
## 1 Spike_2 Spike_1 1.15e- 1
## 2 NCAP Spike_1 2.59e- 3
## 3 VEMP Spike_1 6.24e-11
## 4 NCAP Spike_2 3.45e- 7
## 5 VEMP Spike_2 4.66e-11
## 6 VEMP NCAP 5.40e-11
pw_fs_not4 <- pairwise.wilcox.test(fs_pfs_df.complete.not4$FS,
fs_pfs_df.complete.not4$STIM,
p.adjust.method="bonferroni",
paired=TRUE)
pw_fs_not4$p.value
## Spike_1 Spike_2 NCAP
## Spike_2 3.230971e-01 NA NA
## NCAP 1.963071e-08 1.163716e-09 NA
## VEMP 5.273013e-05 1.342156e-02 1.717125e-10
broom::tidy(pw_fs_not4)
## # A tibble: 6 x 3
## group1 group2 p.value
## <chr> <chr> <dbl>
## 1 Spike_2 Spike_1 3.23e- 1
## 2 NCAP Spike_1 1.96e- 8
## 3 VEMP Spike_1 5.27e- 5
## 4 NCAP Spike_2 1.16e- 9
## 5 VEMP Spike_2 1.34e- 2
## 6 VEMP NCAP 1.72e-10
pw_fs_8 <- pairwise.wilcox.test(fs_pfs_df.complete.8$FS,
fs_pfs_df.complete.8$STIM,
p.adjust.method="bonferroni",
paired=TRUE)
pw_fs_8$p.value
## Spike_1 Spike_2 NCAP
## Spike_2 1.371785e-01 NA NA
## NCAP 6.151116e-05 2.024740e-03 NA
## VEMP 5.502367e-06 4.915498e-07 1
broom::tidy(pw_fs_8)
## # A tibble: 6 x 3
## group1 group2 p.value
## <chr> <chr> <dbl>
## 1 Spike_2 Spike_1 0.137
## 2 NCAP Spike_1 0.0000615
## 3 VEMP Spike_1 0.00000550
## 4 NCAP Spike_2 0.00202
## 5 VEMP Spike_2 0.000000492
## 6 VEMP NCAP 1
# Perform statistical tests for PolyFunctionality Score across Stims
quade_pfs_4 <- quade.test(PFS ~ STIM | `SAMPLE ID`, data = fs_pfs_df.complete.4)
quade_pfs_4
##
## Quade test
##
## data: PFS and STIM and SAMPLE ID
## Quade F = 63.08, num df = 3, denom df = 183, p-value < 2.2e-16
quade_pfs_not4 <- quade.test(PFS ~ STIM | `SAMPLE ID`, data = fs_pfs_df.complete.not4)
quade_pfs_not4
##
## Quade test
##
## data: PFS and STIM and SAMPLE ID
## Quade F = 34.079, num df = 3, denom df = 183, p-value < 2.2e-16
quade_pfs_8 <- quade.test(PFS ~ STIM | `SAMPLE ID`, data = fs_pfs_df.complete.8)
quade_pfs_8
##
## Quade test
##
## data: PFS and STIM and SAMPLE ID
## Quade F = 15.807, num df = 3, denom df = 174, p-value = 3.91e-09
pw_pfs_4 <- pairwise.wilcox.test(fs_pfs_df.complete.4$PFS,
fs_pfs_df.complete.4$STIM,
p.adjust.method="bonferroni",
paired=TRUE)
pw_pfs_4$p.value
## Spike_1 Spike_2 NCAP
## Spike_2 1.574931e-01 NA NA
## NCAP 1.000000e+00 2.378113e-05 NA
## VEMP 6.882751e-11 4.659364e-11 5.948135e-11
# broom::tidy(pw_pfs_4)
pw_pfs_not4 <- pairwise.wilcox.test(fs_pfs_df.complete.not4$PFS,
fs_pfs_df.complete.not4$STIM,
p.adjust.method="bonferroni",
paired=TRUE)
pw_pfs_not4$p.value
## Spike_1 Spike_2 NCAP
## Spike_2 1.000000e+00 NA NA
## NCAP 6.620103e-08 9.866667e-09 NA
## VEMP 4.440744e-02 1.632616e-01 1.014449e-09
# broom::tidy(pw_pfs_not4)
pw_pfs_8 <- pairwise.wilcox.test(fs_pfs_df.complete.8$PFS,
fs_pfs_df.complete.8$STIM,
p.adjust.method="bonferroni",
paired=TRUE)
pw_pfs_8$p.value
## Spike_1 Spike_2 NCAP
## Spike_2 5.165442e-03 NA NA
## NCAP 1.307259e-04 1.192973e-01 NA
## VEMP 2.420840e-07 4.915498e-07 1
# broom::tidy(pw_pfs_8)
# Now plot the results
# Note that the dataset used in statistics excludes 551432, but 551432 is plotted below (and the median bar is calculated including 551432)
# Same goes for "BWT20", "15548", "15530" in CD8 tests
parent.labs <- c("CD4", "Not-CD4", "CD8")
names(parent.labs) <- c("4", "NOT4", "8")
plot_fs_pfs_vs_stim <- function(fs_or_pfs) {
ggplot(fs_pfs_df,
aes(STIM, !!as.name(fs_or_pfs))) +
theme_bw(base_size = 22) +
geom_line(aes(group = `SAMPLE ID`), color="grey") +
geom_point(color="grey") +
facet_grid(. ~ parent,
labeller = labeller(parent = parent.labs)) +
geom_errorbarh(data = fs_pfs_df %>% group_by(parent, STIM) %>% summarise(med = median(!!as.name(fs_or_pfs))),
aes(y = med,
xmax = 1.5 + 0.35,
xmin = 1.5 - 0.35,
height = 0),
position=position_dodge(width=0), color = "black") +
labs(title = sprintf("%s vs Stim", fs_or_pfs),
y = fs_or_pfs) +
theme(axis.title.x = element_blank(),
axis.title.y = element_text(size=22),
axis.text.y = element_text(color="black", size=22),
axis.text.x = element_text(color="black", size=22),
plot.title = element_text(hjust = 0.5),
panel.grid = element_blank(),
legend.position = "none",
plot.margin = margin(1.3, 0.2, 0, 0.2, "cm")) +
scale_x_discrete(labels=c(c("Spike_1" = "Spike 1", "Spike_2" = "Spike 2")),
expand = c(0.1,0.1))
}
plot_fs_pfs_vs_stim("FS")
# Remembering these p-values are adjusted
as.data.frame(pw_fs_4$p.value) %>%
rownames_to_column('tmp') %>%
mutate_all(~cell_spec(.x, color = ifelse(!is.na(.x) & .x < 0.05, "red", "black"))) %>%
column_to_rownames('tmp') %>%
kable(format = "html", escape = F, row.names = T) %>%
kable_styling("striped", full_width = F)
| Spike_1 | Spike_2 | NCAP | |
|---|---|---|---|
| Spike_2 | 0.115177317170495 | NA | NA |
| NCAP | 0.00259359228882635 | 3.4468269894795e-07 | NA |
| VEMP | 6.24494803673655e-11 | 4.65936369334732e-11 | 5.39537888428502e-11 |
as.data.frame(pw_fs_not4$p.value) %>%
rownames_to_column('tmp') %>%
mutate_all(~cell_spec(.x, color = ifelse(!is.na(.x) & .x < 0.05, "red", "black"))) %>%
column_to_rownames('tmp') %>%
kable(format = "html", escape = F, row.names = T) %>%
kable_styling("striped", full_width = F)
| Spike_1 | Spike_2 | NCAP | |
|---|---|---|---|
| Spike_2 | 0.323097099628218 | NA | NA |
| NCAP | 1.96307141898972e-08 | 1.16371623364348e-09 | NA |
| VEMP | 5.27301309207659e-05 | 0.0134215615062708 | 1.717125234786e-10 |
as.data.frame(pw_fs_8$p.value) %>%
rownames_to_column('tmp') %>%
mutate_all(~cell_spec(.x, color = ifelse(!is.na(.x) & .x < 0.05, "red", "black"))) %>%
column_to_rownames('tmp') %>%
kable(format = "html", escape = F, row.names = T) %>%
kable_styling("striped", full_width = F)
| Spike_1 | Spike_2 | NCAP | |
|---|---|---|---|
| Spike_2 | 0.137178525432516 | NA | NA |
| NCAP | 6.15111576317468e-05 | 0.00202474006060593 | NA |
| VEMP | 5.50236697392487e-06 | 4.91549764427344e-07 | 1 |
plot_fs_pfs_vs_stim("PFS")
as.data.frame(pw_pfs_4$p.value) %>%
rownames_to_column('tmp') %>%
mutate_all(~cell_spec(.x, color = ifelse(!is.na(.x) & .x < 0.05, "red", "black"))) %>%
column_to_rownames('tmp') %>%
kable(format = "html", escape = F, row.names = T) %>%
kable_styling("striped", full_width = F)
| Spike_1 | Spike_2 | NCAP | |
|---|---|---|---|
| Spike_2 | 0.157493084727601 | NA | NA |
| NCAP | 1 | 2.37811284128217e-05 | NA |
| VEMP | 6.88275106455714e-11 | 4.65936369334732e-11 | 5.948134864697e-11 |
as.data.frame(pw_pfs_not4$p.value) %>%
rownames_to_column('tmp') %>%
mutate_all(~cell_spec(.x, color = ifelse(!is.na(.x) & .x < 0.05, "red", "black"))) %>%
column_to_rownames('tmp') %>%
kable(format = "html", escape = F, row.names = T) %>%
kable_styling("striped", full_width = F)
| Spike_1 | Spike_2 | NCAP | |
|---|---|---|---|
| Spike_2 | 1 | NA | NA |
| NCAP | 6.62010338999915e-08 | 9.86666729441403e-09 | NA |
| VEMP | 0.0444074361462703 | 0.163261630177697 | 1.01444884747321e-09 |
as.data.frame(pw_pfs_8$p.value) %>%
rownames_to_column('tmp') %>%
mutate_all(~cell_spec(.x, color = ifelse(!is.na(.x) & .x < 0.05, "red", "black"))) %>%
column_to_rownames('tmp') %>%
kable(format = "html", escape = F, row.names = T) %>%
kable_styling("striped", full_width = F)
| Spike_1 | Spike_2 | NCAP | |
|---|---|---|---|
| Spike_2 | 0.00516544163845711 | NA | NA |
| NCAP | 0.000130725943852043 | 0.119297317838269 | NA |
| VEMP | 2.42083959561873e-07 | 4.91549764427344e-07 | 1 |
As we saw in the heatmaps, NCAP and Spike 1 consistently have the highest FS/PFS scores, and VEMP has the lowest.
# Read in the patient manifest with complete data (though it's also in the COMPASSResult object)
data_manifest <- read.csv(here::here("data/Seshadri_HAARVI_PBMC_manifest_merged_11June2020.csv"), check.names = F, stringsAsFactors = F)
setdiff(sort(unique(fs_pfs_df$`SAMPLE ID`)), sort(unique(data_manifest$`Record ID`)))
## [1] "BWT20" "CLT1" "HS09"
setdiff(sort(unique(data_manifest$`Record ID`)), sort(unique(fs_pfs_df$`SAMPLE ID`)))
## [1] "116C" "37C" "75C" "HS9"
# Remember 116C and 37C didn't get run through COMPASS
# 75C was not run at all
# 07B/551432 didn't get run for Spike 2
# For CD8 only: "BWT20", "15548" didn't get run for "Spike 2", "NCAP", and 15530 didn't get run for "Spike 2"
fs_pfs_df_with_manifest <- fs_pfs_df %>%
mutate("Record ID" = dplyr::recode(`SAMPLE ID`,
"HS09" = "HS9")) %>% # also, 75C did not get run
full_join(data_manifest, by = c("Record ID" = "Record ID")) %>%
dplyr::filter(!(`Record ID` %in% c("116C", "37C", "75C"))) %>%
mutate(Days_Symptom_Onset_to_Visit_1 = as.numeric(`Days symptom onset to visit 1`),
Cohort = factor(ifelse(Cohort %in%
c(NA, "Healthy control", "Healthy control 2017-2018"), "Healthy", Cohort),
levels = c("Healthy", "Non-hospitalized", "Hospitalized")))
## Warning: NAs introduced by coercion
fs_pfs_df_with_manifest %>% dplyr::filter(is.na(FS) | is.na(`Record ID`) | is.na(`SAMPLE ID`) | is.na(STIM) | is.na(Cohort))
## [1] SAMPLE ID FS
## [3] PFS COMPASS_run
## [5] parent STIM
## [7] Record ID Sample ID
## [9] Collection date Cell count
## [11] Cohort Age
## [13] Sex Race
## [15] Hispanic? Days symptom onset to visit 1
## [17] Pair ID Race_v2
## [19] Days_Symptom_Onset_to_Visit_1
## <0 rows> (or 0-length row.names)
# Note that p-values on ggscatter plots below are *not* adjusted, e.g. compare below results to this unadjusted p-value:
broom::tidy(lm(FS ~ Age, data = fs_pfs_df_with_manifest %>% dplyr::filter(parent == "4" & STIM == "Spike_2")))
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.0400 0.00725 5.51 0.000000867
## 2 Age 0.000215 0.000132 1.63 0.108
ggscatter(fs_pfs_df_with_manifest,
x = "Age", y = "FS",
color = "STIM", palette = "jco",
add = "reg.line") +
facet_grid(parent ~ STIM) +
stat_cor()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing non-finite values (stat_cor).
## Warning: Removed 22 rows containing missing values (geom_point).
ggscatter(fs_pfs_df_with_manifest,
x = "Age", y = "PFS",
color = "STIM", palette = "jco",
add = "reg.line") +
facet_grid(parent ~ STIM) +
stat_cor()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing non-finite values (stat_cor).
## Warning: Removed 22 rows containing missing values (geom_point).
# Note that p-values on ggscatter plots below are *not* adjusted, e.g.:
broom::tidy(lm(FS ~ Days_Symptom_Onset_to_Visit_1, data = fs_pfs_df_with_manifest %>% dplyr::filter(parent == "4" & STIM == "Spike_2")))
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.0493 0.00648 7.60 3.88e-10
## 2 Days_Symptom_Onset_to_Visit_1 0.0000661 0.000124 0.534 5.95e- 1
# Note that the Healthies don't have values for Days_Symptom_Onset_to_Visit_1 and will get filtered out below
ggscatter(fs_pfs_df_with_manifest,
x = "Days_Symptom_Onset_to_Visit_1", y = "FS",
color = "STIM", palette = "jco",
add = "reg.line") +
facet_grid(parent ~ STIM) +
stat_cor()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 67 rows containing non-finite values (stat_smooth).
## Warning: Removed 67 rows containing non-finite values (stat_cor).
## Warning: Removed 67 rows containing missing values (geom_point).
ggscatter(fs_pfs_df_with_manifest,
x = "Days_Symptom_Onset_to_Visit_1", y = "PFS",
color = "STIM", palette = "jco",
add = "reg.line") +
facet_grid(parent ~ STIM) +
stat_cor()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 67 rows containing non-finite values (stat_smooth).
## Warning: Removed 67 rows containing non-finite values (stat_cor).
## Warning: Removed 67 rows containing missing values (geom_point).
For the CD4 FS vs Age plots, there is a positive association of Age with FS for Spike 1 (at least).
# Note that p-values on ggboxplot plots below are *not* adjusted, e.g.:
broom::tidy(wilcox.test(FS ~ Sex, data = fs_pfs_df_with_manifest %>% dplyr::filter(parent == "4" & STIM == "Spike_2")))
## # A tibble: 1 x 4
## statistic p.value method alternative
## <dbl> <dbl> <chr> <chr>
## 1 353 0.157 Wilcoxon rank sum test two.sided
# Note that future ggpubr update should make outlier.shape=NA occur by default when jitter is added, but type out the parameter for now
ggboxplot(fs_pfs_df_with_manifest %>%
dplyr::filter(!is.na(Sex)),
x = "Sex", y = "FS",
color = "Sex", palette = "jco",
add = "jitter",
outlier.shape = NA) +
facet_grid(parent ~ STIM) +
stat_compare_means(aes(group = Sex))
ggboxplot(fs_pfs_df_with_manifest %>%
dplyr::filter(!is.na(Sex)),
x = "Sex", y = "PFS",
color = "Sex", palette = "jco",
add = "jitter",
outlier.shape = NA) +
facet_grid(parent ~ STIM) +
stat_compare_means(aes(group = Sex))
In many of the boxplots above, Males are shifted slightly higher on the FS/PFS axis compared to Females, but none of the comparisons are significant. Perhaps a t-test or un-stratified plot would be significant.
Look at the heatmaps again. Also focus on IFNg
Stack the plots from the different stims, this time with IFNg subsets on the RHS
A commonality to the subsets which are enriched in Hospitalized individuals is CD154.
# First change any NA Cohort levels to "Healthy"
crList2 <- lapply(crList,
function(cr) {
cr$data$meta$Cohort <- factor(ifelse(cr$data$meta$Cohort %in%
c(NA, "Healthy control", "Healthy control 2017-2018"), "Healthy", cr$data$meta$Cohort),
levels = rev(c("Healthy", "Non-hospitalized", "Hospitalized")))
cr
})
names(crList2) <- names(crList)
CohortColors <- c("Healthy" = "#dfc27d", "Non-hospitalized" = "#80cdc1", "Hospitalized" = "#018571")
row_ann_colors <- list(`Cohort` = CohortColors)
cytokine_annotation_colors <- rep("black", 30)
cytokine_order_for_annotation = c("IFNg", "IL2", "TNFa", "CD154", "CD107a", "IL4/5/13", "IL17a")
# Adapt code from COMPASS::plotCompassResultStack to prepare the data for my_plot.COMPASSResult
# First we use mergeMatricesForPlotCOMPASSResultStack to merge the data from the different runs together.
# Then use the merged data to create a fake merged COMPASSResult object with the merged categories, metadata, and mean_gamma data frames
# This should allow us to use some of the customized options in my_plot.COMPASSResult (e.g. to get the IFNg subsets on the RHS)
mergeMatricesForPlotCOMPASSResultStack_tmp <- utils::getFromNamespace("mergeMatricesForPlotCOMPASSResultStack", "COMPASS")
CohortColors <- c("Hospitalized" = "#083b31", "Non-hospitalized" = "#018571", "Healthy" = "#80cdc1")
# CohortColors <- c("Hospitalized" = "#9da4ce", "Non-hospitalized" = "#c1ddd7", "Healthy" = "#ebeef0") # To match Ab data color scheme?
# CohortColors <- c("Hospitalized" = "#5c6494", "Non-hospitalized" = "#728a84", "Healthy" = "#ebeef0")
StimColors <- c("S1" = "#fc68be", "S2" = "#d94e09", "NCAP" = "#6B49F2", "VEMP" = "#D0F249")
row_ann_colors_v2 <- list(`Stim` = StimColors, `Cohort` = CohortColors)
# CD4 runs
cd4_data2plot <- mergeMatricesForPlotCOMPASSResultStack_tmp(crList2[CD4RunNames],
threshold=0.01,
minimum_dof=1,
maximum_dof=Inf,
row_annotation = c("Stim", "Cohort"),
variable = "Stim")
## The 'threshold' filter has removed 5 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, !CD107a&!CD154&IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa
cd4_merged_COMPASSResult <- crList2$`4_Spike_1`
cd4_merged_COMPASSResult$fit$mean_gamma <- cd4_data2plot$MMerged
cd4_merged_COMPASSResult$fit$categories <- cd4_data2plot$catsMerged %>% mutate_all(~ as.numeric(as.character(.))) %>% mutate(Counts = rowSums(.))
rownames(cd4_merged_COMPASSResult$fit$categories) <- rownames(cd4_data2plot$catsMerged)
cd4_merged_COMPASSResult$data$meta <- cd4_data2plot$rowannMerged
cd4_merged_COMPASSResult$data$meta$Run_SAMPLE_ID <- rownames(cd4_merged_COMPASSResult$data$meta)
cd4_merged_COMPASSResult$data$meta$Stim <- factor(dplyr::recode(sub("^4_", "", cd4_merged_COMPASSResult$data$meta$Stim), "Spike_1" = "S1", "Spike_2" = "S2"), levels = c("S1", "S2", "NCAP", "VEMP"))
cd4_merged_COMPASSResult$data$meta$Cohort <- factor(cd4_merged_COMPASSResult$data$meta$Cohort, levels = c("Hospitalized", "Non-hospitalized", "Healthy"))
cd4_merged_COMPASSResult$data$individual_id <- "Run_SAMPLE_ID"
cd4_heatmap_by_ifng <- print(my_plot.COMPASSResult(cd4_merged_COMPASSResult,
y=c("Stim", "Cohort"),
fontsize=25, fontsize_row=5, fontsize_col=22,
row_annotation_colors=row_ann_colors_v2,
cytokine_annotation_colors=cytokine_annotation_colors,
cytokine_height_multiplier=1.6, order_by_max_functionality = T,
cytokine_order_for_annotation = c("CD154", "IL2", "TNFa",
"CD107a", "IL4/5/13", "IL17a", "IFNg"),
dichotomize_by_cytokine = "IFNg",
dichotomize_by_cytokine_color = "#999999",
staircase_cytokine_annotation = TRUE,
main="CD4 COMPASS Runs"))
## The 'threshold' filter has removed 0 categories:
## gTree[GRID.gTree.6352]
# CD8 runs
cd8_data2plot <- mergeMatricesForPlotCOMPASSResultStack_tmp(crList2[CD8RunNames],
threshold=0.01,
minimum_dof=1,
maximum_dof=Inf,
row_annotation = c("Stim", "Cohort"),
variable = "Stim")
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa
cd8_merged_COMPASSResult <- crList2$`8_Spike_1`
cd8_merged_COMPASSResult$fit$mean_gamma <- cd8_data2plot$MMerged
cd8_merged_COMPASSResult$fit$categories <- cd8_data2plot$catsMerged %>% mutate_all(~ as.numeric(as.character(.))) %>% mutate(Counts = rowSums(.))
rownames(cd8_merged_COMPASSResult$fit$categories) <- rownames(cd8_data2plot$catsMerged)
cd8_merged_COMPASSResult$data$meta <- cd8_data2plot$rowannMerged
cd8_merged_COMPASSResult$data$meta$Run_SAMPLE_ID <- rownames(cd8_merged_COMPASSResult$data$meta)
cd8_merged_COMPASSResult$data$meta$Stim <- factor(dplyr::recode(sub("^8_", "", cd8_merged_COMPASSResult$data$meta$Stim), "Spike_1" = "S1", "Spike_2" = "S2"), levels = c("S1", "S2", "NCAP", "VEMP"))
cd8_merged_COMPASSResult$data$meta$Cohort <- factor(cd8_merged_COMPASSResult$data$meta$Cohort, levels = c("Hospitalized", "Non-hospitalized", "Healthy"))
cd8_merged_COMPASSResult$data$individual_id <- "Run_SAMPLE_ID"
cd8_heatmap_by_ifng <- print(my_plot.COMPASSResult(cd8_merged_COMPASSResult,
y=c("Stim", "Cohort"),
fontsize=25, fontsize_row=5, fontsize_col=22,
row_annotation_colors=row_ann_colors_v2,
cytokine_annotation_colors=cytokine_annotation_colors,
cytokine_height_multiplier=1.6, order_by_max_functionality = T,
cytokine_order_for_annotation = c("CD154", "IL2", "TNFa",
"CD107a", "IL4/5/13", "IL17a", "IFNg"),
dichotomize_by_cytokine = "IFNg",
dichotomize_by_cytokine_color = "#999999",
staircase_cytokine_annotation = TRUE,
main="CD8 COMPASS Runs"))
## The 'threshold' filter has removed 0 categories:
## gTree[GRID.gTree.6661]
if(save_output) {
# Arial font setup. Downloaded afms from https://github.com/microsoft/microsoft-r-open/tree/ec3fd89e5fb5794bd8149905c134ad801bb61800
Arial <- Type1Font(family = "Arial",
metrics = c(here::here("data/Arial_afm/ArialMT.afm"),
here::here("data/Arial_afm/ArialMT-Bold.afm"),
here::here("data/Arial_afm/ArialMT-Italic.afm"),
here::here("data/Arial_afm/ArialMT-BoldItalic.afm")))
pdfFonts(Arial = Arial)
pdf(file=here::here("out/PostCompassPlots/20200810_CD4_COMPASS_Heatmap.pdf"), width=12, height=8,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica. pheatmap calls grid.text, which uses Arial by default.
grid.draw(cd4_heatmap_by_ifng)
dev.off()
pdf(file=here::here("out/PostCompassPlots/20200810_CD8_COMPASS_Heatmap.pdf"), width=10, height=8,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica. pheatmap calls grid.text, which uses Arial by default.
grid.draw(cd8_heatmap_by_ifng)
dev.off()
}
# Note that p-values on ggboxplot plots below are *not* adjusted, e.g.:
broom::tidy(wilcox.test(FS ~ Cohort, data = fs_pfs_df_with_manifest %>% dplyr::filter(Cohort != "Healthy" & parent == "4" & STIM == "Spike_2")))
## # A tibble: 1 x 4
## statistic p.value method alternative
## <dbl> <dbl> <chr> <chr>
## 1 152 0.000164 Wilcoxon rank sum test two.sided
# Note that future ggpubr update should make outlier.shape=NA occur by default when jitter is added, but include for now
ggboxplot(fs_pfs_df_with_manifest %>%
dplyr::filter(Cohort != "Healthy"),
x = "Cohort", y = "FS",
color = "Cohort", palette = "jco",
add = "jitter",
outlier.shape = NA) +
facet_grid(parent ~ STIM) +
stat_compare_means(aes(group = Cohort)) +
scale_x_discrete(labels=c("Non-hospitalized" = "Conv\nNon-Hosp", "Hospitalized" = "Conv\nHosp"))
ggboxplot(fs_pfs_df_with_manifest %>%
dplyr::filter(Cohort != "Healthy"),
x = "Cohort", y = "PFS",
color = "Cohort", palette = "jco",
add = "jitter",
outlier.shape = NA) +
facet_grid(parent ~ STIM) +
stat_compare_means(aes(group = Cohort)) +
scale_x_discrete(labels=c("Non-hospitalized" = "Conv\nNon-Hosp", "Hospitalized" = "Conv\nHosp"))
crList.no_healthy <- lapply(names(crList2),
function(cr_name) {
cr <- crList2[[cr_name]]
print(sprintf("Accessing %s", cr_name))
cr$data$meta <- cr$data$meta %>%
# Filter out samples which weren't run through COMPASS
dplyr::filter(!(`SAMPLE ID` %in% setdiff(cr$data$meta$`SAMPLE ID`, rownames(cr$fit$mean_gamma))))
stopifnot(all.equal(rownames(cr$fit$mean_gamma), cr$data$meta$`SAMPLE ID`))
# And make sure that the count data only includes counts for the samples which were run through COMPASS
stopifnot(all.equal(rownames(cr$data$n_s), rownames(cr$fit$mean_gamma)))
stopifnot(all.equal(rownames(cr$data$n_u), rownames(cr$fit$mean_gamma)))
stopifnot(all.equal(names(cr$data$counts_s), rownames(cr$fit$mean_gamma)))
stopifnot(all.equal(names(cr$data$counts_u), rownames(cr$fit$mean_gamma)))
not_healthy_idx <- which(cr$data$meta$Cohort != "Healthy")
cr$data$meta <- cr$data$meta[not_healthy_idx,] %>%
mutate(Cohort = factor(Cohort, levels = c("Non-hospitalized", "Hospitalized")))
cr$fit$mean_gamma <- cr$fit$mean_gamma[not_healthy_idx,]
cr$data$n_s <- cr$data$n_s[not_healthy_idx,]
cr$data$n_u <- cr$data$n_u[not_healthy_idx,]
cr$data$counts_s <- cr$data$counts_s[not_healthy_idx]
cr$data$counts_u <- cr$data$counts_u[not_healthy_idx]
cr
})
## [1] "Accessing 4_VEMP"
## [1] "Accessing NOT4_VEMP"
## [1] "Accessing 8_VEMP"
## [1] "Accessing 4_Spike_1"
## [1] "Accessing NOT4_Spike_1"
## [1] "Accessing 8_Spike_1"
## [1] "Accessing 4_Spike_2"
## [1] "Accessing NOT4_Spike_2"
## [1] "Accessing 8_Spike_2"
## [1] "Accessing 4_NCAP"
## [1] "Accessing NOT4_NCAP"
## [1] "Accessing 8_NCAP"
names(crList.no_healthy) <- names(crList2)
source(here::here("scripts/20200604_Helper_Functions.R"))
# Arial is used by dotplot function
# Arial font setup. Downloaded afms from https://github.com/microsoft/microsoft-r-open/tree/ec3fd89e5fb5794bd8149905c134ad801bb61800
Arial <- Type1Font(family = "Arial",
metrics = c(here::here("data/Arial_afm/ArialMT.afm"),
here::here("data/Arial_afm/ArialMT-Bold.afm"),
here::here("data/Arial_afm/ArialMT-Italic.afm"),
here::here("data/Arial_afm/ArialMT-BoldItalic.afm")))
pdfFonts(Arial = Arial)
# Note p-values are bonferroni adjusted in dotplots
# Also note that some points are not shown in order to zoom in better to the bulk of the point
bg_corr_dotplots <- pmap(.l = list(c(CD4RunNames, NotCD4RunNames, CD8RunNames),
rep(c("CD4+", "Not-CD4+", "CD8"), each = 4),
list(c(-0.0013, 0.009), c(-0.0018, 0.0085), c(-0.002, 0.0048), NULL,
c(-0.002, 0.0024), c(-0.0012, 0.004), c(-0.002, 0.005), NULL,
c(-0.0025, 0.0065), NULL, c(-0.0015, 0.005), NULL),
c(5, 5, 3, 5,
5, 5, 5, 5,
5, 5, 5, 5)),
.f = function(n, p, y, p_text_size) {
# "staircase effect" for categories df heatmap gets done automatically, unlike in my_plot.COMPASSResult
# Returned dotplot is cowplot
make_dotplot_for_COMPASS_run(crList.no_healthy[[n]], n, parentSubset = p,
return_output = T, current_ylim = y, include_0_line=T,
cytokine_order_for_annotation=cytokine_order_for_annotation,
p_text_size=p_text_size)
})
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
names(bg_corr_dotplots) <- c(CD4RunNames, NotCD4RunNames, CD8RunNames)
for(n in names(bg_corr_dotplots)) {
print(plot_grid(ggplot() +
theme_void() +
geom_text(aes(0,0,label=n), size=10) +
xlab(NULL),
bg_corr_dotplots[[n]]$Dotplot,
ncol=1, rel_heights = c(0.1, 1)))
}
sig_tests_tally <- lapply(names(bg_corr_dotplots), function(n) {length(which(bg_corr_dotplots[[n]]$Test_Results$p.adj < 0.05))})
names(sig_tests_tally) <- names(bg_corr_dotplots)
sig_tests_tally # make sure all significant comparisons were plotted and accounted for
## $`4_Spike_1`
## [1] 9
##
## $`4_Spike_2`
## [1] 4
##
## $`4_NCAP`
## [1] 5
##
## $`4_VEMP`
## [1] 0
##
## $NOT4_Spike_1
## [1] 2
##
## $NOT4_Spike_2
## [1] 1
##
## $NOT4_NCAP
## [1] 1
##
## $NOT4_VEMP
## [1] 0
##
## $`8_Spike_1`
## [1] 0
##
## $`8_Spike_2`
## [1] 0
##
## $`8_NCAP`
## [1] 0
##
## $`8_VEMP`
## [1] 0
Save to disk: background-corrected magnitudes from subsets which were significantly different across hospitalization status. For integrated analysis with other T cell and Ab data.
if(save_output) {
signif_bgcorr_dat_list <- lapply(c(CD4RunNames, CD8RunNames), function(runName) {
signif_subset_names <- bg_corr_dotplots[[runName]]$Test_Results %>% dplyr::filter(p.adj < 0.05) %>% dplyr::pull(BooleanSubset)
bg_corr_dotplots[[runName]]$BgCorrMagnitudes %>%
rename("SAMPLE_ID" = "Individual") %>%
dplyr::select(c("SAMPLE_ID", signif_subset_names)) %>%
rename_at(vars(signif_subset_names), ~ paste("CD", sub("Spike_", "S", runName), " ", ., sep = ""))
})
names(signif_bgcorr_dat_list) <- c(CD4RunNames, CD8RunNames)
bgcorr_dat_2save <- purrr::reduce(signif_bgcorr_dat_list, full_join, by = "SAMPLE_ID") %>%
# Convert proportions into percents
# Convert the numeric columns to character type after rounding to 20 digits. This will help ensure consistency when writing and reading the data to a file
mutate_at(vars(-"SAMPLE_ID"), ~ format(. * 100, digits = 20))
write.csv(bgcorr_dat_2save, here::here("processed_data/20200813_HAARVI_Signif_COMPASS_Subsets_Background_Corrected_Percents_pt1.csv"), row.names = F)
# bgcorr_dat_2save <- read.csv(here::here("processed_data/20200813_HAARVI_Signif_COMPASS_Subsets_Background_Corrected_Percents_pt1.csv"), stringsAsFactors = F, check.names = F)
# all.equal(bgcorr_dat_2save %>% mutate_at(vars(-"SAMPLE_ID"), as.numeric),
# read.csv(here::here("processed_data/20200813_HAARVI_Signif_COMPASS_Subsets_Background_Corrected_Percents_pt1.csv"), stringsAsFactors = F, check.names = F))
}